from pathlib import Path
from ipumspy import IpumsApiClient, UsaExtract, readers, ddi, api
import gzip
import shutil
import os.path

# need to create an account at ipums.org, and then request an API key at https://account.ipums.org/api_keys
IPUMS_API_KEY = 'X'


def download_stata_orig_data(json_name, stata_name):
    extract = api.extract.define_extract_from_json(f'empirical/data_orig/{json_name}')
    # ensure stata
    extract.data_format = "stata"
    # Submit an API extract request
    ipums = IpumsApiClient(IPUMS_API_KEY)

    ipums.submit_extract(extract)
    print(f"Extract submitted for file {stata_name} with id {extract.extract_id} -- need to wait for file to be downloaded...")

    # wait for the extract to finish
    ipums.wait_for_extract(extract)

    # Download the extract
    ipums.download_extract(extract, download_dir='empirical/data_orig/')

    with gzip.open('empirical/data_orig/cps_{:05d}.dta.gz'.format(extract.extract_id, 'rb')) as f_in:
        with open(f'empirical/data_orig/{stata_name}', 'wb') as f_out:
            shutil.copyfileobj(f_in, f_out)
    print(f'Finished extracting {stata_name}')



if __name__ == '__main__':

    stata_json_requests = {
        'cps_longdata.dta': 'json_cps_longdata.txt',
        'cps_motivation.dta': 'json_cps_motivation.txt',
        'cps_regression.dta': 'json_cps_regression.txt',
    }

    # check if data files exist

    for stata_name, json_name in stata_json_requests.items():
        if not os.path.isfile(stata_name):
            download_stata_orig_data(json_name, stata_name)

